import dalex as dx
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from keras.models import Sequential
from keras.layers import Dense
import keras.backend as K
data = pd.read_csv('hotel_bookings.csv')
data.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | NaN | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | NaN | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 2015-07-03 |
5 rows × 32 columns
# in order to simplify plots I decided to use only a subset of variables
data = data[['is_canceled', 'lead_time', 'arrival_date_year', 'adults', 'children', 'babies', 'booking_changes']]
data = data.dropna()
X, y = data.loc[:, data.columns != 'is_canceled'], data[['is_canceled']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
observation = pd.DataFrame({'lead_time': [203.0],
'arrival_date_year': [2016.0],
'adults': [2.0],
'children': [0.0],
'babies': [0.0],
'booking_changes': [4.]},
index = ['observation'])
def f1_metric(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
recall = true_positives / (possible_positives + K.epsilon())
f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
return f1_val
model = Sequential()
model.add(Dense(100, input_dim=6, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1_metric])
model.fit(X_train, y_train, epochs=150, batch_size=10)
Epoch 1/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6294 - accuracy: 0.6431 - f1_metric: 0.1923 Epoch 2/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6301 - accuracy: 0.6292 - f1_metric: 0.1184 Epoch 3/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6114 - accuracy: 0.6600 - f1_metric: 0.2779 Epoch 4/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6219 - accuracy: 0.6371 - f1_metric: 0.1628 - Epoch 5/150 10745/10745 [==============================] - 20s 2ms/step - loss: 0.6235 - accuracy: 0.6270 - f1_metric: 0.1388 Epoch 6/150 10745/10745 [==============================] - 20s 2ms/step - loss: 0.6131 - accuracy: 0.6565 - f1_metric: 0.2974 Epoch 7/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6146 - accuracy: 0.6420 - f1_metric: 0.2362 Epoch 8/150 10745/10745 [==============================] - 20s 2ms/step - loss: 0.6112 - accuracy: 0.6615 - f1_metric: 0.2803 Epoch 9/150 10745/10745 [==============================] - 19s 2ms/step - loss: 0.6090 - accuracy: 0.6628 - f1_metric: 0.2776 Epoch 10/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6097 - accuracy: 0.6624 - f1_metric: 0.2866 Epoch 11/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6130 - accuracy: 0.6556 - f1_metric: 0.2499 Epoch 12/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6094 - accuracy: 0.6623 - f1_metric: 0.2794 Epoch 13/150 10745/10745 [==============================] - 25s 2ms/step - loss: 0.6091 - accuracy: 0.6640 - f1_metric: 0.2689 Epoch 14/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6099 - accuracy: 0.6603 - f1_metric: 0.2654 Epoch 15/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6098 - accuracy: 0.6626 - f1_metric: 0.2577 Epoch 16/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6096 - accuracy: 0.6573 - f1_metric: 0.3510 Epoch 17/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6111 - accuracy: 0.6502 - f1_metric: 0.3392 Epoch 18/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6093 - accuracy: 0.6571 - f1_metric: 0.2860 Epoch 19/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6105 - accuracy: 0.6570 - f1_metric: 0.2723 3s - loss: 0.6109 - accuracy: - ETA: 2s - loss: 0.6109 - accuracy: 0.6564 - f1_metric: 0.27 - ETA: 2s - loss: 0 - ETA Epoch 20/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6104 - accuracy: 0.6635 - f1_metric: 0.2755 Epoch 21/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6090 - accuracy: 0.6635 - f1_metric: 0.2538 Epoch 22/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6099 - accuracy: 0.6621 - f1_metric: 0.2672 Epoch 23/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6101 - accuracy: 0.6606 - f1_metric: 0.2807 Epoch 24/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6095 - accuracy: 0.6598 - f1_metric: 0.3025 Epoch 25/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6101 - accuracy: 0.6556 - f1_metric: 0.2873 Epoch 26/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6074 - accuracy: 0.6653 - f1_metric: 0.2829 Epoch 27/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6071 - accuracy: 0.6657 - f1_metric: 0.2621 Epoch 28/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6086 - accuracy: 0.6650 - f1_metric: 0.2629 Epoch 29/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6102 - accuracy: 0.6637 - f1_metric: 0.2507 Epoch 30/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6097 - accuracy: 0.6570 - f1_metric: 0.3216 0s - loss: 0.6099 - accuracy: 0.6569 - f1_metric: 0. Epoch 31/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6652 - f1_metric: 0.2557 Epoch 32/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6133 - accuracy: 0.6438 - f1_metric: 0.1708 Epoch 33/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6124 - accuracy: 0.6424 - f1_metric: 0.2483 Epoch 34/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6076 - accuracy: 0.6635 - f1_metric: 0.2860 Epoch 35/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6099 - accuracy: 0.6631 - f1_metric: 0.3145 Epoch 36/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6136 - accuracy: 0.6388 - f1_metric: 0.2997 Epoch 37/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6153 - accuracy: 0.6305 - f1_metric: 0.2894 Epoch 38/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6122 - accuracy: 0.6419 - f1_metric: 0.2989 Epoch 39/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6103 - accuracy: 0.6620 - f1_metric: 0.3117 Epoch 40/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6073 - accuracy: 0.6639 - f1_metric: 0.2582 Epoch 41/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6098 - accuracy: 0.6572 - f1_metric: 0.2211 Epoch 42/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6075 - accuracy: 0.6610 - f1_metric: 0.2269 Epoch 43/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6090 - accuracy: 0.6631 - f1_metric: 0.3087 Epoch 44/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6084 - accuracy: 0.6626 - f1_metric: 0.2721 Epoch 45/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6073 - accuracy: 0.6636 - f1_metric: 0.2737 0s - loss: 0.6073 - accuracy: 0.6636 Epoch 46/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6084 - accuracy: 0.6628 - f1_metric: 0.2779 Epoch 47/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6077 - accuracy: 0.6631 - f1_metric: 0.2666 Epoch 48/150 10745/10745 [==============================] - 20s 2ms/step - loss: 0.6084 - accuracy: 0.6644 - f1_metric: 0.2629 Epoch 49/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6088 - accuracy: 0.6608 - f1_metric: 0.2420 Epoch 50/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6074 - accuracy: 0.6644 - f1_metric: 0.2598 Epoch 51/150 10745/10745 [==============================] - 19s 2ms/step - loss: 0.6083 - accuracy: 0.6638 - f1_metric: 0.2646 Epoch 52/150 10745/10745 [==============================] - 19s 2ms/step - loss: 0.6065 - accuracy: 0.6637 - f1_metric: 0.2631 0s - loss: 0.6064 - accuracy: 0. Epoch 53/150 10745/10745 [==============================] - 19s 2ms/step - loss: 0.6076 - accuracy: 0.6641 - f1_metric: 0.2831 Epoch 54/150 10745/10745 [==============================] - 21s 2ms/step - loss: 0.6100 - accuracy: 0.6562 - f1_metric: 0.3729 Epoch 55/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6071 - accuracy: 0.6641 - f1_metric: 0.2930 Epoch 56/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6064 - accuracy: 0.6651 - f1_metric: 0.2836 Epoch 57/150 10745/10745 [==============================] - ETA: 0s - loss: 0.6082 - accuracy: 0.6627 - f1_metric: 0.23 - 27s 2ms/step - loss: 0.6082 - accuracy: 0.6626 - f1_metric: 0.2355 Epoch 58/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6068 - accuracy: 0.6635 - f1_metric: 0.2482 Epoch 59/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6068 - accuracy: 0.6643 - f1_metric: 0.2535 Epoch 60/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6067 - accuracy: 0.6626 - f1_metric: 0.2391 Epoch 61/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6059 - accuracy: 0.6645 - f1_metric: 0.2589 Epoch 62/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6081 - accuracy: 0.6643 - f1_metric: 0.2713 0s - loss: 0.6082 - accuracy: 0.6642 - f1_metric: Epoch 63/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6075 - accuracy: 0.6642 - f1_metric: 0.2753 Epoch 64/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6067 - accuracy: 0.6636 - f1_metric: 0.2541 Epoch 65/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6077 - accuracy: 0.6651 - f1_metric: 0.2713 Epoch 66/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6085 - accuracy: 0.6623 - f1_metric: 0.2540 0s - loss: 0.6085 - accuracy: 0.6620 - f1_ Epoch 67/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6059 - accuracy: 0.6656 - f1_metric: 0.2653 Epoch 68/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6071 - accuracy: 0.6644 - f1_metric: 0.2711 Epoch 69/150 10745/10745 [==============================] - 19s 2ms/step - loss: 0.6070 - accuracy: 0.6650 - f1_metric: 0.2714 Epoch 70/150 10745/10745 [==============================] - 19s 2ms/step - loss: 0.6083 - accuracy: 0.6639 - f1_metric: 0.2679 1s - Epoch 71/150 10745/10745 [==============================] - 20s 2ms/step - loss: 0.6128 - accuracy: 0.6362 - f1_metric: 0.2749 Epoch 72/150 10745/10745 [==============================] - 20s 2ms/step - loss: 0.6128 - accuracy: 0.6325 - f1_metric: 0.2856 Epoch 73/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6139 - accuracy: 0.6261 - f1_metric: 0.2090 1s - loss: 0.6144 - accuracy: - ETA: 0s - loss: 0.614 Epoch 74/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6105 - accuracy: 0.6299 - f1_metric: 0.2798 Epoch 75/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6120 - accuracy: 0.6322 - f1_metric: 0.2966 Epoch 76/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6099 - accuracy: 0.6404 - f1_metric: 0.3304 1s - los - ETA: 0s - loss: 0.6101 - Epoch 77/150 10745/10745 [==============================] - 25s 2ms/step - loss: 0.6077 - accuracy: 0.6623 - f1_metric: 0.3048 Epoch 78/150 10745/10745 [==============================] - 25s 2ms/step - loss: 0.6068 - accuracy: 0.6653 - f1_metric: 0.2665 1s - loss: 0.6 Epoch 79/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6074 - accuracy: 0.6646 - f1_metric: 0.2620 Epoch 80/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6069 - accuracy: 0.6646 - f1_metric: 0.2679 Epoch 81/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6099 - accuracy: 0.6521 - f1_metric: 0.1853 Epoch 82/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6098 - accuracy: 0.6620 - f1_metric: 0.2685 Epoch 83/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6072 - accuracy: 0.6650 - f1_metric: 0.2876 0s - loss: 0.6073 - accuracy: 0.66 Epoch 84/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6085 - accuracy: 0.6618 - f1_metric: 0.2469 Epoch 85/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6069 - accuracy: 0.6643 - f1_metric: 0.2607 0s - loss: 0.6069 - accuracy: 0.6643 - f1_metric: 0.26 Epoch 86/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6081 - accuracy: 0.6610 - f1_metric: 0.2260 Epoch 87/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6076 - accuracy: 0.6645 - f1_metric: 0.2758 Epoch 88/150 10745/10745 [==============================] - ETA: 0s - loss: 0.6065 - accuracy: 0.6649 - f1_metric: 0.25 - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6649 - f1_metric: 0.2554 Epoch 89/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6086 - accuracy: 0.6627 - f1_metric: 0.2534 Epoch 90/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6648 - f1_metric: 0.2597 0s - loss: 0.6060 - accuracy: 0.6648 - f1_metric: 0. Epoch 91/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6082 - accuracy: 0.6648 - f1_metric: 0.2986 Epoch 92/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6065 - accuracy: 0.6647 - f1_metric: 0.2496 Epoch 93/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6079 - accuracy: 0.6643 - f1_metric: 0.2586 Epoch 94/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6076 - accuracy: 0.6630 - f1_metric: 0.2560 Epoch 95/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6069 - accuracy: 0.6643 - f1_metric: 0.2511 Epoch 96/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6078 - accuracy: 0.6624 - f1_metric: 0.2371 Epoch 97/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6077 - accuracy: 0.6638 - f1_metric: 0.2548 Epoch 98/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6074 - accuracy: 0.6612 - f1_metric: 0.2175 0s - loss: 0.6075 - accuracy: 0.6614 - f1_metric Epoch 99/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6059 - accuracy: 0.6654 - f1_metric: 0.2642 Epoch 100/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6068 - accuracy: 0.6635 - f1_metric: 0.2406 Epoch 101/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6058 - accuracy: 0.6653 - f1_metric: 0.2649 Epoch 102/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6077 - accuracy: 0.6640 - f1_metric: 0.2608 Epoch 103/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6063 - accuracy: 0.6657 - f1_metric: 0.2782 Epoch 104/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6060 - accuracy: 0.6648 - f1_metric: 0.2535 Epoch 105/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6062 - accuracy: 0.6645 - f1_metric: 0.2510 Epoch 106/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6663 - f1_metric: 0.2883 Epoch 107/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6668 - f1_metric: 0.2864 Epoch 108/150 10745/10745 [==============================] - 25s 2ms/step - loss: 0.6063 - accuracy: 0.6651 - f1_metric: 0.2745 Epoch 109/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6095 - accuracy: 0.6576 - f1_metric: 0.2248 Epoch 110/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6055 - accuracy: 0.6644 - f1_metric: 0.2500 Epoch 111/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6060 - accuracy: 0.6645 - f1_metric: 0.2617 Epoch 112/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6060 - accuracy: 0.6651 - f1_metric: 0.2739 Epoch 113/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6052 - accuracy: 0.6652 - f1_metric: 0.2581 Epoch 114/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6061 - accuracy: 0.6662 - f1_metric: 0.2867 Epoch 115/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6631 - f1_metric: 0.2462 Epoch 116/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6071 - accuracy: 0.6638 - f1_metric: 0.3069 Epoch 117/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6075 - accuracy: 0.6658 - f1_metric: 0.2657 Epoch 118/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6056 - accuracy: 0.6652 - f1_metric: 0.2526 Epoch 119/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6055 - accuracy: 0.6646 - f1_metric: 0.2577 Epoch 120/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6068 - accuracy: 0.6643 - f1_metric: 0.3007 Epoch 121/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6054 - accuracy: 0.6652 - f1_metric: 0.2593 Epoch 122/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6073 - accuracy: 0.6641 - f1_metric: 0.2520 Epoch 123/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6057 - accuracy: 0.6639 - f1_metric: 0.2428 0s - loss: 0.6057 - accuracy: Epoch 124/150 10745/10745 [==============================] - 22s 2ms/step - loss: 0.6060 - accuracy: 0.6624 - f1_metric: 0.2371 Epoch 125/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6118 - accuracy: 0.6624 - f1_metric: 0.2519 Epoch 126/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6085 - accuracy: 0.6630 - f1_metric: 0.2569 Epoch 127/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6069 - accuracy: 0.6655 - f1_metric: 0.2755 Epoch 128/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6088 - accuracy: 0.6608 - f1_metric: 0.2245 1s - loss: 0 Epoch 129/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6072 - accuracy: 0.6657 - f1_metric: 0.2625 Epoch 130/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6086 - accuracy: 0.6641 - f1_metric: 0.2477 Epoch 131/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6065 - accuracy: 0.6646 - f1_metric: 0.2870 Epoch 132/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6649 - f1_metric: 0.2629 Epoch 133/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6054 - accuracy: 0.6656 - f1_metric: 0.2596 0s - loss: 0.6053 - accuracy: 0.6656 - f1_metr Epoch 134/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6052 - accuracy: 0.6647 - f1_metric: 0.2392 Epoch 135/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6063 - accuracy: 0.6654 - f1_metric: 0.2879 Epoch 136/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6057 - accuracy: 0.6656 - f1_metric: 0.2954 Epoch 137/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6055 - accuracy: 0.6654 - f1_metric: 0.2748 0s - loss: 0.6052 - accuracy: 0.6659 - f1_metr Epoch 138/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6057 - accuracy: 0.6640 - f1_metric: 0.2520 Epoch 139/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6054 - accuracy: 0.6644 - f1_metric: 0.2446 1s - loss: 0.6048 - accuracy: 0.6651 - - ETA: Epoch 140/150 10745/10745 [==============================] - 25s 2ms/step - loss: 0.6079 - accuracy: 0.6605 - f1_metric: 0.2652 Epoch 141/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6065 - accuracy: 0.6658 - f1_metric: 0.3009 0s - loss: 0.6070 - - ETA: 0s - loss: 0.6065 - accuracy: 0.6658 - f1_metric: 0.30 Epoch 142/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6076 - accuracy: 0.6640 - f1_metric: 0.2451 Epoch 143/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6637 - f1_metric: 0.2536 Epoch 144/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6058 - accuracy: 0.6646 - f1_metric: 0.2663 Epoch 145/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6646 - f1_metric: 0.2647 0s - loss: 0.6061 - accuracy: 0.6643 - Epoch 146/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6071 - accuracy: 0.6651 - f1_metric: 0.3173 Epoch 147/150 10745/10745 [==============================] - 24s 2ms/step - loss: 0.6081 - accuracy: 0.6660 - f1_metric: 0.3018 2s - loss: 0.6083 - accuracy: - ETA: 2s - ETA: 0s - loss: 0.6081 - Epoch 148/150 10745/10745 [==============================] - 27s 3ms/step - loss: 0.6054 - accuracy: 0.6659 - f1_metric: 0.2581 0s - loss: 0.6054 - accuracy: 0.6659 - f1_metric: 0.25 Epoch 149/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6049 - accuracy: 0.6645 - f1_metric: 0.2388 Epoch 150/150 10745/10745 [==============================] - 23s 2ms/step - loss: 0.6065 - accuracy: 0.6644 - f1_metric: 0.2897
<tensorflow.python.keras.callbacks.History at 0x161305b1d90>
model.evaluate(X_test, y_test) # parameters were not tuned, so results are bad
374/374 [==============================] - 1s 1ms/step - loss: 0.6059 - accuracy: 0.6647 - f1_metric: 0.4049
[0.6058630347251892, 0.6647123098373413, 0.4049013555049896]
model.predict(observation) # prediction
array([[0.3713342]], dtype=float32)
exp_nn = dx.Explainer(model, X_train, y_train, label='neural_network')
Preparation of a new explainer is initiated -> data : 107447 rows 6 cols -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. -> target variable : 107447 values -> model_class : tensorflow.python.keras.engine.sequential.Sequential (default) -> label : neural_network -> predict function : <function yhat_tf_regression at 0x0000016122F04DC0> will be used (default) -> predict function : Accepts pandas.DataFrame and numpy.ndarray. -> predicted values : min = 0.121, mean = 0.341, max = 0.581 -> model type : regression will be used (default) -> residual function : difference between y and yhat (default) -> residuals : min = -0.581, mean = 0.0293, max = 0.879 -> model_info : package tensorflow A new explainer has been created!
nn_profile = exp_nn.predict_profile(new_observation = observation)
# plot Ceteris Paribus profile
nn_profile.plot(variables = ['lead_time', 'children', 'babies', 'booking_changes'])
# in this model chidren and babies variables did not matter at all
# bigger lead time(>81 days) increased probability of cancelation, so did lead time > 235 days
# alternatively bigger number of booking changes decreased chances of clients not coming
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:00<00:00, 8.27it/s]
forest = RandomForestClassifier(max_depth=2, random_state=0)
forest.fit(X_train, y_train)
RandomForestClassifier(max_depth=2, random_state=0)
forest.predict(observation) # prediction
array([0], dtype=int64)
exp_forest = dx.Explainer(forest, X_train, y_train, label='random_forest')
Preparation of a new explainer is initiated -> data : 107447 rows 6 cols -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. -> target variable : 107447 values -> model_class : sklearn.ensemble._forest.RandomForestClassifier (default) -> label : random_forest -> predict function : <function yhat_proba_default at 0x0000016122F04C10> will be used (default) -> predict function : Accepts pandas.DataFrame and numpy.ndarray. -> predicted values : min = 0.139, mean = 0.37, max = 0.45 -> model type : classification will be used (default) -> residual function : difference between y and yhat (default) -> residuals : min = -0.45, mean = -3.51e-05, max = 0.861 -> model_info : package sklearn A new explainer has been created!
forest_profile = exp_forest.predict_profile(new_observation = observation)
forest_profile.plot(variables = ['lead_time', 'children', 'babies', 'booking_changes'])
# this model was not so dependent on lead time, as well as on number of children and babies
# none zero amount of booking changes caused the decrease of the cancelation probability
# but it wasn't so significant as by previous model
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:00<00:00, 23.74it/s]
adaboost = AdaBoostClassifier(n_estimators=100, random_state=0)
adaboost.fit(X_train, y_train)
AdaBoostClassifier(n_estimators=100, random_state=0)
adaboost.predict(observation) # prediction
array([0], dtype=int64)
exp_adaboost = dx.Explainer(forest, X_train, y_train, label='adaboost')
Preparation of a new explainer is initiated -> data : 107447 rows 6 cols -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. -> target variable : 107447 values -> model_class : sklearn.ensemble._forest.RandomForestClassifier (default) -> label : adaboost -> predict function : <function yhat_proba_default at 0x0000016122F04C10> will be used (default) -> predict function : Accepts pandas.DataFrame and numpy.ndarray. -> predicted values : min = 0.139, mean = 0.37, max = 0.45 -> model type : classification will be used (default) -> residual function : difference between y and yhat (default) -> residuals : min = -0.45, mean = -3.51e-05, max = 0.861 -> model_info : package sklearn A new explainer has been created!
adaboost_profile = exp_adaboost.predict_profile(new_observation = observation)
adaboost_profile.plot(variables = ['lead_time', 'children', 'babies', 'booking_changes'])
# ada boost doesn't really differ from random forest
# only numbers of babies has more visible drop than earlier
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:00<00:00, 26.78it/s]
In general models demonstrated similar behavior(bigger lead time means increase and bigger number of booking changes means decrease of the cancelation probability) and had similar significant variables. Neural network however demonstrated a bit more complex decision changes(it had several visible 'jumping' points not only one). Of course the results would be more reliable if models were better trained(but hyperparameters tuning is out of scope for this homework).